library(survival)
library(FRESA.CAD)
## Loading required package: Rcpp
## Loading required package: stringr
## Loading required package: miscTools
## Loading required package: Hmisc
##
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
##
## format.pval, units
## Loading required package: pROC
## Type 'citation("pROC")' for a citation.
##
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
##
## cov, smooth, var
#library(corrplot)
#source("~/GitHub/FRESA.CAD/R/RRPlot.R")
#source("~/GitHub/FRESA.CAD/R/PoissonEventRiskCalibration.R")
op <- par(no.readonly = TRUE)
pander::panderOptions('digits', 3)
#pander::panderOptions('table.split.table', 400)
pander::panderOptions('keep.trailing.zeros',TRUE)
data(cancer)
colon <- subset(colon,etype==1)
colon$etype <- NULL
rownames(colon) <- colon$id
colon$id <- NULL
colon <- colon[complete.cases(colon),]
time <- colon$time
status <- colon$status
data <- colon
data$time <- NULL
data$study <- NULL
table(data$status)
0 1 442 446
dataColon <- as.data.frame(model.matrix(status~.*age,data))
dataColon$`(Intercept)` <- NULL
dataColon$time <- time/365
dataColon$status <- status
colnames(dataColon) <-str_replace_all(colnames(dataColon),":","_")
colnames(dataColon) <-str_replace_all(colnames(dataColon),"\\.","_")
colnames(dataColon) <-str_replace_all(colnames(dataColon),"\\+","_")
data <- NULL
trainsamples <- sample(nrow(dataColon),0.7*nrow(dataColon))
dataColonTrain <- dataColon[trainsamples,]
dataColonTest <- dataColon[-trainsamples,]
pander::pander(table(dataColonTrain$status))
pander::pander(table(dataColonTest$status))
Modeling
ml <- BSWiMS.model(Surv(time,status)~1,data=dataColonTrain,NumberofRepeats = 10)
[+++-+++++++-+++-+++-+++-+++-+++-+++-+++-]…
sm <- summary(ml)
pander::pander(sm$coefficients)
Table continues below
| age_nodes |
5.32e-04 |
1.000 |
1.001 |
1.001 |
0.615 |
| node4 |
2.97e-01 |
1.208 |
1.345 |
1.498 |
0.610 |
| age_node4 |
2.54e-03 |
1.002 |
1.003 |
1.004 |
0.610 |
| extent |
2.29e-01 |
1.120 |
1.257 |
1.411 |
0.560 |
| rxLev_5FU_age |
-2.79e-03 |
0.996 |
0.997 |
0.999 |
0.565 |
| rxLev_5FU |
-8.99e-02 |
0.866 |
0.914 |
0.965 |
0.565 |
| adhere |
1.42e-01 |
1.033 |
1.153 |
1.287 |
0.538 |
| nodes |
8.50e-03 |
1.002 |
1.009 |
1.015 |
0.617 |
| age_adhere |
-9.51e-04 |
0.998 |
0.999 |
1.000 |
0.538 |
| age_extent |
1.96e-10 |
1.000 |
1.000 |
1.000 |
0.535 |
Table continues below
| age_nodes |
0.523 |
0.614 |
0.616 |
0.523 |
0.616 |
| node4 |
0.607 |
0.625 |
0.612 |
0.607 |
0.626 |
| age_node4 |
0.590 |
0.607 |
0.612 |
0.589 |
0.609 |
| extent |
0.610 |
0.625 |
0.557 |
0.612 |
0.626 |
| rxLev_5FU_age |
0.618 |
0.625 |
0.564 |
0.620 |
0.626 |
| rxLev_5FU |
0.609 |
0.607 |
0.564 |
0.611 |
0.609 |
| adhere |
0.614 |
0.614 |
0.541 |
0.615 |
0.615 |
| nodes |
0.614 |
0.625 |
0.618 |
0.615 |
0.626 |
| age_adhere |
0.614 |
0.615 |
0.541 |
0.615 |
0.616 |
| age_extent |
0.504 |
0.535 |
0.534 |
0.500 |
0.534 |
| age_nodes |
0.03421 |
0.451 |
6.32 |
6.13 |
0.092524 |
1.0 |
| node4 |
0.03346 |
0.435 |
5.45 |
6.26 |
0.019414 |
1.0 |
| age_node4 |
0.03106 |
0.417 |
5.33 |
6.04 |
0.019620 |
1.0 |
| extent |
0.01940 |
0.221 |
3.89 |
4.02 |
0.014547 |
1.0 |
| rxLev_5FU_age |
0.01388 |
0.255 |
3.52 |
3.42 |
0.006196 |
1.0 |
| rxLev_5FU |
0.01245 |
0.255 |
3.26 |
3.42 |
-0.001775 |
1.0 |
| adhere |
0.00658 |
0.189 |
2.53 |
3.24 |
-0.000363 |
0.7 |
| nodes |
0.00396 |
0.155 |
2.37 |
2.02 |
0.010566 |
1.0 |
| age_adhere |
0.00525 |
0.402 |
2.25 |
5.58 |
0.001831 |
0.1 |
| age_extent |
0.00569 |
0.137 |
2.05 |
1.72 |
0.034241 |
0.1 |
Cox Model Performance
Here we evaluate the model using the RRPlot() function.
The evaluation of the raw Cox model with RRPlot()
Here we will use the predicted event probability assuming a baseline
hazard for events withing 5 years
index <- predict(ml,dataColonTrain)
timeinterval <- 2*mean(subset(dataColonTrain,status==1)$time)
h0 <- sum(dataColonTrain$status & dataColonTrain$time <= timeinterval)
h0 <- h0/sum((dataColonTrain$time > timeinterval) | (dataColonTrain$status==1))
rdata <- cbind(dataColonTrain$status,ppoisGzero(index,h0))
rrAnalysisTrain <- RRPlot(rdata,atProb=c(0.90),
timetoEvent=dataColonTrain$time,
title="Raw Train: Colon Cancer",
ysurvlim=c(0.00,1.0),
riskTimeInterval=timeinterval)






Cox Calibration
op <- par(no.readonly = TRUE)
calprob <- CoxRiskCalibration(ml,dataColonTrain,"status","time")
pander::pander(c(h0=calprob$h0,
Gain=calprob$hazardGain,
DeltaTime=calprob$timeInterval),
caption="Cox Calibration Parameters")
Cross-Validation
Here we will cross validate the training set and evaluate also on the
testing set. The h0 and the timeinterval are the ones estimated on the
calibration process
rcv <- randomCV(theData=dataColonTrain,
theOutcome = Surv(time,status)~1,
fittingFunction=BSWiMS.model,
trainFraction = 0.75,
repetitions=50,
classSamplingType = "Pro",
testingSet=dataColonTest
)
.[++++++].[++++++].[+++-].[+++].[++++].[++++].[+++-].[+++].[++++].[++++]10
Tested: 850 Avg. Selected: 9 Min Tests: 1 Max Tests: 10 Mean Tests:
4.976471 . MAD: 0.473827
.[++++].[++-].[+++–].[++++].[+++-].[++++-].[++++].[+++++].[+++-].[+++]20
Tested: 886 Avg. Selected: 9 Min Tests: 1 Max Tests: 20 Mean Tests:
9.548533 . MAD: 0.4725908
.[++].[+++-].[++++].[+++-].[+++++].[+++-].[++++].[+++-].[++++].[+++-]30
Tested: 888 Avg. Selected: 8.733333 Min Tests: 1 Max Tests: 30 Mean
Tests: 14.29054 . MAD: 0.4726087
.[++].[++++-].[++++-].[+++].[++-].[+++++++].[+++++++].[++++].[+++-].[++++]40
Tested: 888 Avg. Selected: 8.65 Min Tests: 3 Max Tests: 40 Mean Tests:
19.05405 . MAD: 0.4729596
.[+++-].[+++].[+++].[++++-].[+++-].[++++-].[+++].[+++-].[+++-].[++++-]50
Tested: 888 Avg. Selected: 8.6 Min Tests: 4 Max Tests: 50 Mean Tests:
23.81757 . MAD: 0.4729532
stp <- rcv$survTestPredictions
stp <- stp[!is.na(stp[,4]),]
bbx <- boxplot(unlist(stp[,1])~rownames(stp),plot=FALSE)
times <- bbx$stats[3,]
status <- boxplot(unlist(stp[,2])~rownames(stp),plot=FALSE)$stats[3,]
prob <- ppoisGzero(boxplot(unlist(stp[,4])~rownames(stp),plot=FALSE)$stats[3,],h0)
rdatacv <- cbind(status,prob)
rownames(rdatacv) <- bbx$names
names(times) <- bbx$names
rrAnalysisCVTest <- RRPlot(rdatacv,atThr = rrAnalysisTrain$thr_atP,
timetoEvent=times,
title="CV Test: Colon Cancer",
ysurvlim=c(0.00,1.0),
riskTimeInterval=timeinterval)





